home *** CD-ROM | disk | FTP | other *** search
/ Collection of Tools & Utilities / Collection of Tools and Utilities.iso / archiver / huff_sc.zip / HUF.C < prev    next >
C/C++ Source or Header  |  1991-04-25  |  29KB  |  816 lines

  1. /*******************************************************************************
  2. *                                                                              *
  3. * HUF.C   by Shaun Case   April 1991                                           *
  4. *                                                                              *
  5. * Written in Borland C++ 2.0 under MS-DOS 3.3                                  *
  6. *                                                                              *
  7. * Uses Huffman encoding to compress a single file.                             *
  8. * This program is in the public domain.                                        *
  9. *                                                                              *
  10. * atman%ecst.csuchico.edu@RELAY.CS.NET                                         *
  11. *                                                                              *
  12. *                                                                              *
  13. *******************************************************************************/
  14.  
  15. #include <stdio.h>
  16. #include <math.h>
  17. #include <dir.h>                        /* for storing filename in output file      */
  18.  
  19. #define FALSE 0                         /* i'm still deciding on a style...         */
  20. #define TRUE !FALSE
  21.  
  22. /*
  23.  * for lots of interesting (?) diagnostics, uncomment the following:
  24. #define VERBOSE
  25.  *
  26.  */
  27.  
  28.  
  29. typedef struct chardata {               /* template for leaves and nodes in tree    */
  30.     short charnum;                      /* which character to be encoded            */
  31.                                         /* don't want it lost in the sort           */
  32.     unsigned long total;                /* total occurances in the file             */
  33.     short seq_length;                   /* length of this char's huffman code (bits)*/
  34.     short bit_sequence;                 /* the huffman code sequence for this char  */
  35.     double frequency;                   /* frequency of occurance, < 1.0            */
  36.     struct chardata *up, *left, *right; /* pointers to rest of tree                 */
  37. };
  38.  
  39. typedef struct decode_table_element {   /* template for decode table element (wow)  */
  40.     unsigned char letter;               /* which character to decode to             */
  41.     char spare;                         /* force 16-bit word alignment              */
  42.     short left;                         /* index of lower left element from tree    */
  43.     short right;                        /* index of lower right element from tree   */
  44. };
  45.  
  46.  
  47. int compare(const void*, const void*);  /* prototype for compare() for qsort()      */
  48.  
  49. struct chardata *huftable[256];         /* array that points to leaves in tree      */
  50. struct chardata *root=NULL;             /* future root of tree                      */
  51. struct decode_table_element *decode_table;  /* pointer to decode table              */
  52. short array_max_index=0;                /* max number of elements in array (to be   */
  53.                                         /* determined in create_decode_table()      */
  54.  
  55. long real_bit_total=0L;
  56. double avg_bits_per_symbol=0;           /* averages num of bits needed to represent */
  57. unsigned long  total;                   /* total number of unencoded bytes          */
  58. long real_bits_total = 0L;
  59. FILE *infile;                           /* file ptr to original file (uncompressed) */
  60. FILE *outfile;                          /* file ptr to output fiel   (compressed)   */
  61. char *infile_name;                      /* ptr to name of input file                */
  62. char *outfile_name;                     /* ptr to name of output file               */
  63.  
  64.  
  65. int main(int argc, char **argv)
  66. {
  67.  
  68. #ifdef VERBOSE
  69.     void show_encode_info(void);                    /* prototype   */
  70.     void show_encoded_file_size(void);              /* prototype   */
  71.     void show_decode_table(void);                   /* prototype   */
  72. #endif
  73.  
  74.     void dumptable(void);                           /* prototype   */
  75.     short create_tree(void);                        /* prototype   */
  76.     void gen_bit_sequences(struct chardata *node);  /* prototype   */
  77.     short create_decode_table(void);                /* prototype   */
  78.     short compress(void);                           /* prototype   */
  79.  
  80.     register short c;                               /* a character */
  81.     
  82.     if (argc != 3) {                                /* check command line arguments */
  83.         puts("'huf file1 file2' encodes file1 into file2.");
  84.         return 1;
  85.     }
  86.     puts("Huf by Shaun Case, 1991, Public Domain");
  87.     infile_name = argv[1];
  88.     outfile_name = argv[2];
  89.  
  90.     puts("Analyzing data.");
  91.  
  92.     for (c=0; c < 256; c++)                         /* initialize decode table      */
  93.     {
  94.         if ((huftable[c] = (struct chardata *)malloc(sizeof (struct chardata)))== NULL)
  95.         {
  96.             printf("Unable to allocate space for %dth huftable node.",c);
  97.             return 1;
  98.         }
  99.         huftable[c]->charnum   = c;                 /* need to know who we are after qsort() gets done with us */
  100.         huftable[c]->total     = 0L;
  101.         huftable[c]->frequency = 0;
  102.         huftable[c]->up        = NULL;
  103.         huftable[c]->left      = NULL;
  104.         huftable[c]->right     = NULL;
  105.     }
  106.  
  107.  
  108.     if ((infile=fopen(infile_name, "rb")) == NULL)  /* open the input file              */
  109.     {
  110.         printf("Unable to open %s.\n", infile_name);
  111.         return 1;
  112.     }
  113.  
  114.     while (!feof(infile))                           /* get character distribution data  */
  115.     {
  116.         c = fgetc(infile);
  117.  
  118.         if (feof(infile))
  119.             continue;
  120.  
  121.         ++total;
  122.         ++huftable[c]->total;                       /* increment total for char c       */
  123.     }
  124.  
  125.     if (total == 0L)                                /* handle empty file                */
  126.     {
  127.         puts("Input file is empty, aborting.");
  128.         return 0;
  129.     }
  130.  
  131.     fclose(infile);
  132.                                                     /* sort according to frequency of occurance */
  133.  
  134.     qsort((void *)huftable, (size_t)256, (size_t)sizeof(struct chardata *),
  135.         compare);
  136.  
  137.     dumptable();                                    /* fill huftable with distribution freqs    */
  138.  
  139.     if (create_tree() != TRUE)                      /* make the huffman tree (bit sequences)    */
  140.         return 1;
  141.  
  142. #ifdef VERBOSE
  143.     printf("\nFreq of root is %f.\n",root->frequency);  /* this should be 1.0 if all went well  */
  144.  
  145.     puts("\nBit sequences:\n\n");
  146. #endif
  147.  
  148.     avg_bits_per_symbol = 0;
  149.     gen_bit_sequences(root);                        /* build bit sequences, stuff seqs &        */
  150.                                                     /* lengths into associated leaves           */
  151.  
  152. #ifdef VERBOSE
  153.     printf("\n\nActual bits per symbol = %f", avg_bits_per_symbol);
  154.     printf("\nActual final data size w/o table should be %f\n",
  155.         avg_bits_per_symbol * (double)total / ((double) 8));
  156.  
  157.     show_encoded_file_size();
  158. #endif
  159.  
  160.     puts("Building decode table...");
  161.     if (create_decode_table() != TRUE)              /* create decode array from tree            */
  162.     {
  163.         puts("Unable to allocate space for decode table, exiting.");
  164.         return 1;
  165.     }
  166.  
  167.     printf("Decode table built, contains %d elements.\n",array_max_index + 1);
  168.  
  169. #ifdef VERBOSE
  170.     show_decode_table();
  171.     show_encode_info();
  172. #endif
  173.  
  174.     if (compress() != 0)                            /* now that we have all necessary info,     */
  175.         return 1;                                   /* perform the compression.                 */
  176.  
  177.     puts("Done.");
  178.     return 0;
  179.  
  180. }
  181.  
  182. /*****************************************************************************
  183.  * this code is going to be a little strange -- we have an array of items    *
  184.  * that are going to be the leaves of the tree, and we have to go upward     *
  185.  * linking them together to make the root.  For algorithm, see my notebook,  *
  186.  * or look up huffman compression in a good algorithms book, or see          *
  187.  * huffman's paper -- "A Method for the Construction of M